%include "Q:\PU2\Macros\hashmerge.sas";
%include "Q:\PU2\Macros\clean_flsa.sas";
%include "Q:\PU2\Macros\hist.sas";
%include "Q:\PU2\Macros\salaried_flsa.sas";
%include "Q:\PU2\Macros\bunched_flsa.sas";
%include "Q:\PU2\Macros\collapsed.sas";
%include "Q:\PU2\Macros\collapse_firm.sas";
%include "Q:\PU2\Macros\collapse_industry.sas";
%include "Q:\PU2\Macros\collapse_size.sas";
%include "Q:\PU2\Macros\firm.sas";
%include "Q:\PU2\Macros\append.sas";

libname temp "Q:\PU2\temp";
libname input "Q:\External Research Data\AutoPay";
libname output "Q:\PU2\New folder";
options threads cpucount=actual;


*Balanced panel of firms from Jan 2016 to Dec 2019;
	proc summary data=output.firm_list(where=(state_bin=1 and year(yr_month)>=2016 and mod(month(yr_month),2)=0)) nway;
		class client_code;
		output out=balanced_firms(drop=_type_ rename=(_freq_=n_firms) where=(n_firms=24));	
	run;
	%hashmerge(data=balanced_firms,
		one=balanced_firms,
		many=output.client_crosswalk,
		by=("client_code"),type=inner);
	%hashmerge(data=balanced_panel,
		one=balanced_firms,
		many=output.panel_collapsed_adp(where=(state_bin=1 and year(yr_month)>=2016 and mod(month(yr_month),2)=0)),
		by=("client_id"),type=inner);

*Number of firms in balanced panel;
	proc summary data=balanced_panel nway;
		class yr_month client_id;
		output out=temp(drop=_type_ _freq_);	
	run;
	proc summary data=temp nway;
		class yr_month;
		output out=balanced_panel_num(drop=_type_ rename=(_freq_=n_firms));	
	run;
	data temp.num_firms_in_long_panel;
		set temp;
	run;

*Collapse panel by salaried-bin over all firms;
	proc summary data=balanced_panel nway;
		class yr_month salaried bin;
		var n;
		output out=balanced_panel_collapsed(drop=_type_ _freq_)
		sum(n)=n;	
	run;

*Collapse panel to 40 dollar bins;
	data balanced_panel_collapsed;
		set balanced_panel_collapsed;
		bin=floor(bin/40)*40;
	run;

	proc summary data=balanced_panel_collapsed nway;
		class yr_month salaried bin;
		var n;
		output out=balanced_panel_collapsed(drop=_type_ _freq_) sum(n)=n;
	run;

*Average by number of firms;
	%hashmerge(data=balanced_panel_collapsed,
		one=balanced_panel_num,
		many=balanced_panel_collapsed,
		by=("yr_month"),type=inner);

	data balanced_panel_collapsed;
		set balanced_panel_collapsed;
		n=n/n_firms;
	run;

*Base month;
	data base(keep=salaried bin n rename=(n=n_base));
		set balanced_panel_collapsed(where=(yr_month='01apr2016'd));
	run;
	%hashmerge(data=temp_graph,
		one=base, many=balanced_panel_collapsed,
		by=("salaried", "bin"),type=inner);

	data temp_graph;
		set temp_graph;
		date=yr_month;
		format date monyy7.;
		diff_n=n-n_base;
		month=month(yr_month);

		if yr_month='01jan2016'd then yr_month='31jan2016'd;
		if yr_month='01feb2016'd then yr_month='29feb2016'd;
		if yr_month='01mar2016'd then yr_month='31mar2016'd;
		if yr_month='01apr2016'd then yr_month='30apr2016'd;
		if yr_month='01may2016'd then yr_month='31may2016'd;
		if yr_month='01jun2016'd then yr_month='30jun2016'd;
		if yr_month='01jul2016'd then yr_month='31jul2016'd;
		if yr_month='01aug2016'd then yr_month='31aug2016'd;
		if yr_month='01sep2016'd then yr_month='30sep2016'd;
		if yr_month='01oct2016'd then yr_month='31oct2016'd;
		if yr_month='01nov2016'd then yr_month='30nov2016'd;
		if yr_month='01dec2016'd then yr_month='31dec2016'd;

		if yr_month='01jan2017'd then yr_month='31jan2017'd;
		if yr_month='01feb2017'd then yr_month='28feb2017'd;
		if yr_month='01mar2017'd then yr_month='31mar2017'd;
		if yr_month='01apr2017'd then yr_month='30apr2017'd;
		if yr_month='01may2017'd then yr_month='31may2017'd;
		if yr_month='01jun2017'd then yr_month='30jun2017'd;
		if yr_month='01jul2017'd then yr_month='31jul2017'd;
		if yr_month='01aug2017'd then yr_month='31aug2017'd;
		if yr_month='01sep2017'd then yr_month='30sep2017'd;
		if yr_month='01oct2017'd then yr_month='31oct2017'd;
		if yr_month='01nov2017'd then yr_month='30nov2017'd;
		if yr_month='01dec2017'd then yr_month='31dec2017'd;

		if yr_month='01feb2018'd then yr_month='28feb2018'd;
		if yr_month='01apr2018'd then yr_month='30apr2018'd;
		if yr_month='01jun2018'd then yr_month='30jun2018'd;
		if yr_month='01aug2018'd then yr_month='31aug2018'd;
		if yr_month='01oct2018'd then yr_month='31oct2018'd;
		if yr_month='01dec2018'd then yr_month='31dec2018'd;

		if yr_month='01feb2019'd then yr_month='28feb2019'd;
		if yr_month='01apr2019'd then yr_month='30apr2019'd;
		if yr_month='01jun2019'd then yr_month='30jun2019'd;
		if yr_month='01aug2019'd then yr_month='31aug2019'd;
		if yr_month='01oct2019'd then yr_month='31oct2019'd;
		if yr_month='01dec2019'd then yr_month='31dec2019'd;
	run;

*Program to plot outcome against bin;
%macro sgplot(data=,sal=,col=,row=,var=,a=,b=,ymin=,ymax=,title=,year=);
	proc sgpanel data=&data.(where=(salaried=&sal. and bin<=1500));
		panelby yr_month / columns=&col. rows=&row. novarname;
		series x=bin y=&var. / markers markerattrs=(size=1.35mm);
		refline &a. / axis=x lineattrs=(color=black pattern=dash) transparency=0.2;
		refline &b. / axis=x lineattrs=(color=red pattern=dash) transparency=0.2;
		refline 0 / axis=y transparency=0.2;
		%if &sal.=1 %then %do;	
			rowaxis min=-0.75 max=1 label="Difference in Number of Salaried Workers From &year.";
		%end;
		%if &sal.=0 %then %do;	
			rowaxis min=-2 max=-2 label="Difference in Number of Hourly Workers From &year.";
		%end;
		colaxis label="Weekly Base Pay";
		title &title.;
	run;
%mend;


ods graphics / reset=all width=10in border=off; 
*2016 FLSA;
	%sgplot(data=temp_graph,sal=1,col=6,row=4,var=diff_n,a=460,b=920,ymin=-0.75,ymax=0.8,
		title="Difference in Distribution of Base Pay by April 2016, Salaried Workers",year=April 2016);
	%sgplot(data=temp_graph,sal=0,col=6,row=4,var=diff_n,a=460,b=920,ymin=-1,ymax=-1,
		title="Difference in Distribution of Base Pay by April 2016, Hourly Workers",year=April 2016);

